#encoding=utf-8
import jieba.posseg as pseg

def cutWord(content):
    word_dic = {}
    words = pseg.cut(content)

    #过滤所不需要的词性,去重和统计TF
    for w in words:
        if (w.flag!="x" and w.flag!="uj" and w.flag!="c" and w.flag!="p"):
            if (w.flag!="u" and w.flag!="m" and w.flag!="r" and w.flag!="d"):
                if (w.flag!="y" and w.flag!="e" and w.flag!="h" and w.flag!="k"):
                    if (w.flag!="ul" and w.flag!="uz" and w.flag!="f" and w.flag!="eng"):
                        if word_dic.has_key(w.word):
                            word_dic[w.word] += 1
                        else:
                            word_dic[w.word] = 1
    return word_dic
